import pandas as pd
from datasets import load_dataset
from datasets import Dataset
from config import cfg
import os

data_file_path = os.path.join(cfg.dataset_dir, '21183.parquet')
rd_ds = pd.read_parquet(data_file_path)
print(rd_ds.head())
print(f'columns: {rd_ds.columns}')

# Combine the two attributes into an instruction string
rd_ds['instruction'] = 'Create a detailed description for the following product: ' + rd_ds[
    'product'] + ', belonging to category: ' + rd_ds['category']

print(rd_ds.instruction.head())
